#!/usr/bin/env python3
# Copyright 2020 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Tests for detokenize."""

import base64
import datetime as dt
import io
import os
import struct
import tempfile
import unittest
from unittest import mock

from pw_tokenizer import database
from pw_tokenizer import detokenize
from pw_tokenizer import elf_reader
from pw_tokenizer import tokens


# This function is not part of this test. It was used to generate the binary
# strings for EMPTY_ELF and ELF_WITH_TOKENIZER_SECTIONS. It takes a path and
# returns a Python byte string suitable for copying into Python source code.
def path_to_byte_string(path):
    with open(path, 'rb') as fd:
        data = fd.read()

    output = []
    indices = iter(range(len(data)))

    while True:
        line = ''

        while len(line) < 70:
            try:
                i = next(indices)
            except StopIteration:
                break

            line += repr(data[i:i + 1])[2:-1].replace("'", r'\'')

        if not line:
            return ''.join(output)

        output.append("    b'{}'\n".format(''.join(line)))


# This is an empty ELF file. It was created from the ELF file for
# tokenize_test.cc with the command:
#
#   arm-none-eabi-objcopy -S --only-section NO_SECTIONS_PLEASE <ELF> <OUTPUT>
#
# The resulting ELF was converted to a Python binary string using
# path_to_byte_string function above.
EMPTY_ELF = (
    b'\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00(\x00\x01'
    b'\x00\x00\x00\xd1\x83\x00\x084\x00\x00\x00\xe0\x00\x00\x00\x00\x04\x00\x05'
    b'4\x00 \x00\x05\x00(\x00\x02\x00\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x07\x00'
    b'\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00'
    b'\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00'
    b'\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x06\x00\x00\x00\x00\x00\x01\x00\x00.shstrtab\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01'
    b'\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd4\x00\x00'
    b'\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00'
    b'\x00\x00\x00')

# This is an ELF file with only .tokenized and .tokenizer_info sections.
# It was created from the ELF file for tokenize_test.cc with the command:
#
#   arm-none-eabi-objcopy -S --only-section ".tokenize*" <ELF> <OUTPUT>
#
# The resulting ELF was converted to a Python binary string using
# path_to_byte_string function above. The file is also included in the repo as
# example_binary_with_tokenized_logs.elf.
ELF_WITH_TOKENIZER_SECTIONS = (
    b'\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00(\x00\x01'
    b'\x00\x00\x00\xd1\x83\x00\x084\x00\x00\x00\x04\x03\x00\x00\x00\x04\x00\x05'
    b'4\x00 \x00\x05\x00(\x00\x04\x00\x03\x00\x01\x00\x00\x00\xd4\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x07\x00'
    b'\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00'
    b'\x01\x00\x01\x00\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00'
    b'\x00\x00\xd4\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\xd4\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x06\x00\x00\x00\x00\x00\x01\x00%llu\x00\x00\x00\x00%llx\x00\x00\x00\x00'
    b'%u %d\x00\x00\x00The answer: "%s"\x00\x00\x00\x00Jello, world!\x00\x00'
    b'\x00Jello!\x00\x00Jello?\x00\x00%s there are %x (%.2f) of them%c\x00\x00'
    b'\x00\x00The answer is: %s\x00\x00\x00%x%lld%1.2f%s\x00\x00\x00The answ'
    b'er is: %s\x00\x00\x00%ld\x00%d\x00\x00%ld\x00The answer is: %s\x00\x00'
    b'\x00The answer is: %s\x00\x00\x00The answer is: %s\x00\x00\x00The answ'
    b'er is: %s\x00\x00\x00The answer is: %s\x00\x00\x00Hello %s! %hd %e\x00'
    b'\x00\x00\x00%u%d%02x%X%hu%hhu%d%ld%lu%lld%llu%c%c%c\x00%u%d%02x%X%hu%h'
    b'hu%d%ld%lu%lld%llu%c%c%c\x00%u%d%02x%X%hu%hhu%d%ld%lu%lld%llu%c%c%c\x00'
    b'Won\'t fit : %s%d\x00\x00\x00\x00hash_length\x00`\x00\x00\x00sizeof_l\x00'
    b'\x00\x00\x00\x04\x00\x00\x00sizeof_j\x00\x00\x00\x00\x08\x00\x00\x00si'
    b'zeof_z\x00\x00\x00\x00\x04\x00\x00\x00sizeof_t\x00\x00\x00\x00\x04\x00'
    b'\x00\x00\x00.shstrtab\x00.tokenized\x00.tokenizer_info\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x0b\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\xd4\x00\x00\x00\xb5\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04'
    b'\x00\x00\x00\x00\x00\x00\x00\x16\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x8c\x02\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x00\x00\x00\xdc\x02\x00\x00&\x00\x00\x00\x00\x00\x00'
    b'\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00')

# 0x2e668cd6 is 'Jello, world!' (which is also used in database_test.py).
JELLO_WORLD_TOKEN = b'\xd6\x8c\x66\x2e'


class DetokenizeTest(unittest.TestCase):
    """Tests the detokenize.Detokenizer."""
    def test_simple(self):
        detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(0xcdab, '%02d %s %c%%',
                                            dt.datetime.now())
            ]))
        self.assertEqual(str(detok.detokenize(b'\xab\xcd\0\0\x02\x03Two\x66')),
                         '01 Two 3%')

    def test_detokenize_extra_data_is_unsuccessful(self):
        detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(1, 'no args', dt.datetime(1, 1, 1))
            ]))

        result = detok.detokenize(b'\x01\0\0\0\x04args')
        self.assertEqual(len(result.failures), 1)
        string, args, remaining = result.failures[0]
        self.assertEqual('no args', string)
        self.assertFalse(args)
        self.assertEqual(b'\x04args', remaining)
        self.assertEqual('no args', string)
        self.assertEqual('no args', str(result))

    def test_detokenize_missing_data_is_unsuccessful(self):
        detok = detokenize.Detokenizer(
            tokens.Database(
                [tokens.TokenizedStringEntry(2, '%s', dt.datetime(1, 1, 1))]))

        result = detok.detokenize(b'\x02\0\0\0')
        string, args, remaining = result.failures[0]
        self.assertEqual('%s', string)
        self.assertEqual(len(args), 1)
        self.assertEqual(b'', remaining)
        self.assertEqual(len(result.failures), 1)
        self.assertEqual('%s', str(result))

    def test_detokenize_missing_data_with_errors_is_unsuccessful(self):
        detok = detokenize.Detokenizer(tokens.Database(
            [tokens.TokenizedStringEntry(2, '%s', dt.datetime(1, 1, 1))]),
                                       show_errors=True)

        result = detok.detokenize(b'\x02\0\0\0')
        string, args, remaining = result.failures[0]
        self.assertIn('%s MISSING', string)
        self.assertEqual(len(args), 1)
        self.assertEqual(b'', remaining)
        self.assertEqual(len(result.failures), 1)
        self.assertIn('%s MISSING', str(result))

    def test_unparsed_data(self):
        detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(1, 'no args',
                                            dt.datetime(100, 1, 1)),
            ]))
        result = detok.detokenize(b'\x01\0\0\0o_o')
        self.assertFalse(result.ok())
        self.assertEqual('no args', str(result))
        self.assertIn('o_o', repr(result))
        self.assertIn('decoding failed', result.error_message())

    def test_empty_db(self):
        detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF))
        self.assertFalse(detok.detokenize(b'\x12\x34\0\0').ok())
        self.assertIn('unknown token',
                      detok.detokenize(b'1234').error_message())
        self.assertIn('unknown token', repr(detok.detokenize(b'1234')))
        self.assertEqual('', str(detok.detokenize(b'1234')))

        self.assertIsNone(detok.detokenize(b'').token)

    def test_empty_db_show_errors(self):
        detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF), show_errors=True)
        self.assertFalse(detok.detokenize(b'\x12\x34\0\0').ok())
        self.assertIn('unknown token',
                      detok.detokenize(b'1234').error_message())
        self.assertIn('unknown token', repr(detok.detokenize(b'1234')))
        self.assertIn('unknown token', str(detok.detokenize(b'1234')))

        self.assertIsNone(detok.detokenize(b'').token)

    def test_missing_token_show_errors(self):
        detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF), show_errors=True)
        self.assertIn('missing token', detok.detokenize(b'').error_message())
        self.assertIn('missing token', str(detok.detokenize(b'')))
        self.assertIn('missing token', repr(detok.detokenize(b'123')))

        self.assertIn('missing token', detok.detokenize(b'1').error_message())
        self.assertIn('missing token', str(detok.detokenize(b'1')))
        self.assertIn('missing token', repr(detok.detokenize(b'1')))

        self.assertIn('missing token',
                      detok.detokenize(b'123').error_message())
        self.assertIn('missing token', str(detok.detokenize(b'123')))
        self.assertIn('missing token', repr(detok.detokenize(b'123')))

    def test_missing_token(self):
        detok = detokenize.Detokenizer(io.BytesIO(EMPTY_ELF))
        self.assertIn('missing token', detok.detokenize(b'').error_message())
        self.assertEqual('', str(detok.detokenize(b'')))
        self.assertIn('missing token', repr(detok.detokenize(b'123')))

        self.assertIn('missing token', detok.detokenize(b'1').error_message())
        self.assertEqual('', str(detok.detokenize(b'1')))
        self.assertIn('missing token', repr(detok.detokenize(b'1')))

        self.assertIn('missing token',
                      detok.detokenize(b'123').error_message())
        self.assertEqual('', str(detok.detokenize(b'123')))
        self.assertIn('missing token', repr(detok.detokenize(b'123')))

    def test_decode_from_elf_data(self):
        detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))

        self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
        self.assertEqual(str(detok.detokenize(JELLO_WORLD_TOKEN)),
                         'Jello, world!')

        undecoded_args = detok.detokenize(JELLO_WORLD_TOKEN + b'some junk')
        self.assertFalse(undecoded_args.ok())
        self.assertEqual(str(undecoded_args), 'Jello, world!')

        self.assertTrue(detok.detokenize(b'\0\0\0\0').ok())
        self.assertEqual(str(detok.detokenize(b'\0\0\0\0')), '')

    def test_decode_from_elf_file(self):
        detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
        expected_tokens = frozenset(detok.database.token_to_entries.keys())

        with tempfile.NamedTemporaryFile() as elf:
            elf.write(ELF_WITH_TOKENIZER_SECTIONS)
            elf.seek(0)

            # Open ELF by file object
            detok = detokenize.Detokenizer(elf)
            self.assertEqual(expected_tokens,
                             frozenset(detok.database.token_to_entries.keys()))

            # Open ELF by path
            detok = detokenize.Detokenizer(elf.name)
            self.assertEqual(expected_tokens,
                             frozenset(detok.database.token_to_entries.keys()))

            # Open ELF by elf_reader.Elf
            elf.seek(0)
            detok = detokenize.Detokenizer(elf_reader.Elf(elf))
            self.assertEqual(expected_tokens,
                             frozenset(detok.database.token_to_entries.keys()))

    def test_decode_from_csv_file(self):
        detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
        expected_tokens = frozenset(detok.database.token_to_entries.keys())

        csv_database = str(detok.database)
        self.assertEqual(len(csv_database.splitlines()), 16)

        with tempfile.NamedTemporaryFile('r+') as csv_file:
            csv_file.write(csv_database)
            csv_file.seek(0)

            # Open CSV by path
            detok = detokenize.Detokenizer(csv_file.name)
            self.assertEqual(expected_tokens,
                             frozenset(detok.database.token_to_entries.keys()))

            # Open CSV by file object
            detok = detokenize.Detokenizer(csv_file)
            self.assertEqual(expected_tokens,
                             frozenset(detok.database.token_to_entries.keys()))

    def test_create_detokenizer_with_token_database(self):
        detok = detokenize.Detokenizer(io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
        expected_tokens = frozenset(detok.database.token_to_entries.keys())

        detok = detokenize.Detokenizer(detok.database)
        self.assertEqual(expected_tokens,
                         frozenset(detok.database.token_to_entries.keys()))


class DetokenizeWithCollisions(unittest.TestCase):
    """Tests collision resolution."""
    def setUp(self):
        super().setUp()
        token = 0xbaad

        # Database with several conflicting tokens.
        self.detok = detokenize.Detokenizer(tokens.Database([
            tokens.TokenizedStringEntry(token, 'REMOVED', dt.datetime(9, 1, 1)),
            tokens.TokenizedStringEntry(token, 'newer'),
            tokens.TokenizedStringEntry(token, 'A: %d', dt.datetime(30, 5, 9)),
            tokens.TokenizedStringEntry(token, 'B: %c', dt.datetime(30, 5, 10)),
            tokens.TokenizedStringEntry(token, 'C: %s'),
            tokens.TokenizedStringEntry(token, '%d%u'),
            tokens.TokenizedStringEntry(token, '%s%u %d'),
            tokens.TokenizedStringEntry(1, '%s'),
            tokens.TokenizedStringEntry(1, '%d'),
            tokens.TokenizedStringEntry(2, 'Three %s %s %s'),
            tokens.TokenizedStringEntry(2, 'Five %d %d %d %d %s'),
        ]))  # yapf: disable

    def test_collision_no_args_favors_most_recently_present(self):
        no_args = self.detok.detokenize(b'\xad\xba\0\0')
        self.assertFalse(no_args.ok())
        self.assertEqual(len(no_args.successes), 2)
        self.assertEqual(len(no_args.failures), 5)
        self.assertEqual(len(no_args.matches()), 7)
        self.assertEqual(str(no_args), 'newer')
        self.assertEqual(len(no_args.best_result()[1]), 0)
        self.assertEqual(no_args.best_result()[0], 'newer')

    def test_collision_one_integer_arg_favors_most_recently_present(self):
        multiple_correct = self.detok.detokenize(b'\xad\xba\0\0\x7a')
        self.assertFalse(multiple_correct.ok())
        self.assertIn('ERROR', repr(multiple_correct))
        self.assertEqual(len(multiple_correct.successes), 2)
        self.assertEqual(len(multiple_correct.failures), 5)
        self.assertEqual(len(multiple_correct.matches()), 7)
        self.assertEqual(str(multiple_correct), 'B: =')

    def test_collision_one_integer_arg_favor_successful_decode(self):
        # One string decodes successfully, since the arg is out of range for %c.
        int_arg = self.detok.detokenize(b'\xad\xba\0\0\xfe\xff\xff\xff\x0f')
        self.assertTrue(int_arg.ok())
        self.assertEqual(str(int_arg), 'A: 2147483647')

    def test_collision_one_string_arg_favors_successful_decode(self):
        # One string decodes successfully, since decoding the argument as an
        # integer does not decode all the data.
        string_arg = self.detok.detokenize(b'\xad\xba\0\0\x02Hi')
        self.assertTrue(string_arg.ok())
        self.assertEqual(str(string_arg), 'C: Hi')

    def test_collision_one_string_arg_favors_decoding_all_data(self):
        result = self.detok.detokenize(b'\1\0\0\0\x83hi')
        self.assertEqual(len(result.failures), 2)
        # Should resolve to the string since %d would leave one byte behind.
        self.assertEqual(str(result), '%s')

    def test_collision_multiple_args_favors_decoding_more_arguments(self):
        result = self.detok.detokenize(b'\2\0\0\0\1\2\1\4\5')
        self.assertEqual(len(result.matches()), 2)
        self.assertEqual(result.matches()[0][0], 'Five -1 1 -1 2 %s')
        self.assertEqual(result.matches()[1][0], 'Three \2 \4 %s')

    def test_collision_multiple_args_favors_decoding_all_arguments(self):
        unambiguous = self.detok.detokenize(b'\xad\xba\0\0\x01#\x00\x01')
        self.assertTrue(unambiguous.ok())
        self.assertEqual(len(unambiguous.matches()), 7)
        self.assertEqual('#0 -1', str(unambiguous))
        self.assertIn('#0 -1', repr(unambiguous))


@mock.patch('os.path.getmtime')
class AutoUpdatingDetokenizerTest(unittest.TestCase):
    """Tests the AutoUpdatingDetokenizer class."""
    def test_update(self, mock_getmtime):
        db = database.load_token_database(
            io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
        self.assertEqual(len(db), 16)

        the_time = [100]

        def move_back_time_if_file_exists(path):
            if os.path.exists(path):
                the_time[0] -= 1
                return the_time[0]

            raise FileNotFoundError

        mock_getmtime.side_effect = move_back_time_if_file_exists

        with tempfile.NamedTemporaryFile('wb', delete=True) as fd:
            detok = detokenize.AutoUpdatingDetokenizer(fd.name,
                                                       min_poll_period_s=0)
            self.assertFalse(detok.detokenize(JELLO_WORLD_TOKEN).ok())

            tokens.write_binary(db, fd)
            fd.flush()

            self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())

        # The database stays around if the file is deleted.
        self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())

    def test_no_update_if_time_is_same(self, mock_getmtime):
        mock_getmtime.return_value = 100

        with tempfile.NamedTemporaryFile('wb', delete=True) as fd:
            tokens.write_csv(
                database.load_token_database(
                    io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS)), fd)
            fd.flush()

            detok = detokenize.AutoUpdatingDetokenizer(fd, min_poll_period_s=0)
            self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())

            # Empty the database, but keep the modified time the same.
            fd.truncate(0)
            fd.flush()
            self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())
            self.assertTrue(detok.detokenize(JELLO_WORLD_TOKEN).ok())

            # Move back time so the now-empty file is reloaded.
            mock_getmtime.return_value = 50
            self.assertFalse(detok.detokenize(JELLO_WORLD_TOKEN).ok())


def _next_char(message):
    return bytes(b + 1 for b in message)


class PrefixedMessageDecoderTest(unittest.TestCase):
    def setUp(self):
        super().setUp()
        self.decode = detokenize.PrefixedMessageDecoder('$', 'abcdefg')

    def test_transform_single_message(self):
        self.assertEqual(
            b'%bcde',
            b''.join(self.decode.transform(io.BytesIO(b'$abcd'), _next_char)))

    def test_transform_message_amidst_other_only_affects_message(self):
        self.assertEqual(
            b'%%WHAT?%bcd%WHY? is this %ok %', b''.join(
                self.decode.transform(
                    io.BytesIO(b'$$WHAT?$abc$WHY? is this $ok $'),
                    _next_char)))

    def test_transform_empty_message(self):
        self.assertEqual(
            b'%1%',
            b''.join(self.decode.transform(io.BytesIO(b'$1$'), _next_char)))

    def test_transform_sequential_messages(self):
        self.assertEqual(
            b'%bcd%efghh', b''.join(
                self.decode.transform(io.BytesIO(b'$abc$defgh'), _next_char)))


class DetokenizeBase64(unittest.TestCase):
    """Tests detokenizing Base64 messages."""

    JELLO = b'$' + base64.b64encode(JELLO_WORLD_TOKEN)

    RECURSION_STRING = f'The secret message is "{JELLO.decode()}"'
    RECURSION = b'$' + base64.b64encode(
        struct.pack('I', tokens.default_hash(RECURSION_STRING)))

    RECURSION_STRING_2 = f"'{RECURSION.decode()}', said the spy."
    RECURSION_2 = b'$' + base64.b64encode(
        struct.pack('I', tokens.default_hash(RECURSION_STRING_2)))

    TEST_CASES = (
        (b'', b''),
        (JELLO, b'Jello, world!'),
        (b'Hello ' + JELLO + b'?', b'Hello Jello, world!?'),
        (b'$' + JELLO, b'$Jello, world!'),
        (JELLO + JELLO, b'Jello, world!Jello, world!'),
        (JELLO + b'$' + JELLO, b'Jello, world!$Jello, world!'),
        (b'$3141', b'$3141'),
        (JELLO + b'$3141', b'Jello, world!$3141'),
        (RECURSION, b'The secret message is "Jello, world!"'),
        (RECURSION_2,
         b'\'The secret message is "Jello, world!"\', said the spy.'),
    )

    def setUp(self):
        super().setUp()
        db = database.load_token_database(
            io.BytesIO(ELF_WITH_TOKENIZER_SECTIONS))
        db.add([self.RECURSION_STRING, self.RECURSION_STRING_2])
        self.detok = detokenize.Detokenizer(db)

    def test_detokenize_base64_live(self):
        for data, expected in self.TEST_CASES:
            output = io.BytesIO()
            detokenize.detokenize_base64_live(self.detok, io.BytesIO(data),
                                              output, '$')

            self.assertEqual(expected, output.getvalue())

    def test_detokenize_base64_to_file(self):
        for data, expected in self.TEST_CASES:
            output = io.BytesIO()
            detokenize.detokenize_base64_to_file(self.detok, data, output, '$')

            self.assertEqual(expected, output.getvalue())

    def test_detokenize_base64(self):
        for data, expected in self.TEST_CASES:
            self.assertEqual(
                expected, detokenize.detokenize_base64(self.detok, data, b'$'))


class DetokenizeBase64InfiniteRecursion(unittest.TestCase):
    """Tests that infinite Bas64 token recursion resolves."""
    def setUp(self):
        super().setUp()
        self.detok = detokenize.Detokenizer(
            tokens.Database([
                tokens.TokenizedStringEntry(0, '$AAAAAA=='),  # token for 0
                tokens.TokenizedStringEntry(1, '$AgAAAA=='),  # token for 2
                tokens.TokenizedStringEntry(2, '$AwAAAA=='),  # token for 3
                tokens.TokenizedStringEntry(3, '$AgAAAA=='),  # token for 2
            ]))

    def test_detokenize_self_recursion(self):
        for depth in range(5):
            self.assertEqual(
                detokenize.detokenize_base64(self.detok,
                                             b'This one is deep: $AAAAAA==',
                                             recursion=depth),
                b'This one is deep: $AAAAAA==')

    def test_detokenize_self_recursion_default(self):
        self.assertEqual(
            detokenize.detokenize_base64(self.detok,
                                         b'This one is deep: $AAAAAA=='),
            b'This one is deep: $AAAAAA==')

    def test_detokenize_cyclic_recursion_even(self):
        self.assertEqual(
            detokenize.detokenize_base64(self.detok,
                                         b'I said "$AQAAAA=="',
                                         recursion=2), b'I said "$AgAAAA=="')

    def test_detokenize_cyclic_recursion_odd(self):
        self.assertEqual(
            detokenize.detokenize_base64(self.detok,
                                         b'I said "$AQAAAA=="',
                                         recursion=3), b'I said "$AwAAAA=="')


if __name__ == '__main__':
    unittest.main()
